library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

# Load the processing functions. This also loads the normalization functions.
source("safegraph_process_patterns_functions.R")

sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'

sg_hourly_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/weekly-patterns/v2/hourly/'

In this Rmd, I explore the correlation between daily visits (weighted and unweighted) and Reff on a county level. I consider the month of June as an example.

First need to process visits data to get daily visits and weighted visits information for each county. Uses code from simone_processing_hourly_visits.Rmd to process the visits; note that the bay_hourly_visits_places files already exist, so I didn’t recreate those. Here I process for Alameda, San Mateo, Santa Clara, and San Francisco counties.

# # first getting the hourly visits for each county, for each day
# sf_blockgroups <-
#   block_groups("CA","San Francisco", cb=F, progress_bar=F) %>%
#   st_transform('+proj=longlat +datum=WGS84')
# 
# ac_blockgroups <-
#   block_groups("CA","Alameda", cb=F, progress_bar=F) %>%
#   st_transform('+proj=longlat +datum=WGS84')
# 
# smc_blockgroups <-
#   block_groups("CA","San Mateo", cb=F, progress_bar=F) %>%
#   st_transform('+proj=longlat +datum=WGS84')
# 
# scc_blockgroups <-
#   block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>%
#   st_transform('+proj=longlat +datum=WGS84')
# 
# sf_bgs <- sf_blockgroups %>% pull(GEOID)
# 
# ac_bgs <- ac_blockgroups %>% pull(GEOID)
# 
# smc_bgs <- smc_blockgroups %>% pull(GEOID)
# 
# scc_bgs <- scc_blockgroups %>% pull(GEOID)
# 
# files_to_process <- list.files(paste0(sg_hourly_path, "ByDay/"), pattern = "day")
# 
# # select a set of the files to process, corresponding to the dates in June of interest
# files_to_process_selected1 <- files_to_process[85:91]
# files_to_process_selected2 <- files_to_process[92:105]
# files_to_process_selected3 <- files_to_process[106:119]
# 
# #bay_hourly_visits_places <- NULL
# ac_hourly_visits <- NULL
# sf_hourly_visits <- NULL
# smc_hourly_visits <- NULL
# scc_hourly_visits <- NULL
# 
# files_to_process_selected <- files_to_process_selected3 # swap based on which you want to process
# 
# for (i in 1:length(files_to_process_selected)) {
#   curr_bay_hourly_origins <- readRDS(paste0(sg_hourly_path, "ByDay/", files_to_process_selected[i])) %>% dplyr::select(safegraph_place_id, date, hour, visit_counts_hourly_high, visit_counts_hourly_low, origin_census_block_group, pop) %>%
#     filter(!is.na(pop)) %>% # remove any block groups that aren't in CA
#     filter(!is.na(visit_counts_hourly_high)) # this causes aggregation errors later if NAs show up
# 
#   # not needed
#   # # get hourly visits to each safegraph place
#   # curr_bay_hourly_visits_places <- curr_bay_hourly_origins %>%
#   #   group_by(safegraph_place_id, hour, date) %>%
#   # summarize(total_visits_hourly_high = sum(visit_counts_hourly_high), total_visits_hourly_low = sum(visit_counts_hourly_low)) %>%
#   # mutate(total_visits_hourly_avg = (total_visits_hourly_high + total_visits_hourly_low) / 2) %>%
#   # ungroup()
# 
#   # get hourly visits in Alameda to each place
#   curr_ac_hourly_visits <- curr_bay_hourly_origins %>% filter(origin_census_block_group %in% ac_bgs) %>%
#     group_by(date, hour, safegraph_place_id) %>%
#     summarize(total_visits_high = sum(visit_counts_hourly_high), total_visits_low = sum(visit_counts_hourly_low)) %>%
#     mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
# 
#   # get hourly visits in SF to each place
#   curr_sf_hourly_visits <- curr_bay_hourly_origins %>% filter(origin_census_block_group %in% sf_bgs) %>%
#     group_by(date, hour, safegraph_place_id) %>%
#     summarize(total_visits_high = sum(visit_counts_hourly_high), total_visits_low = sum(visit_counts_hourly_low)) %>%
#     mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
#   
#   # get hourly visits in SMC to each place
#   curr_smc_hourly_visits <- curr_bay_hourly_origins %>% filter(origin_census_block_group %in% smc_bgs) %>%
#     group_by(date, hour, safegraph_place_id) %>%
#     summarize(total_visits_high = sum(visit_counts_hourly_high), total_visits_low = sum(visit_counts_hourly_low)) %>%
#     mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
#   
#   # get hourly visits in SCC to each place
#   curr_scc_hourly_visits <- curr_bay_hourly_origins %>% filter(origin_census_block_group %in% scc_bgs) %>%
#     group_by(date, hour, safegraph_place_id) %>%
#     summarize(total_visits_high = sum(visit_counts_hourly_high), total_visits_low = sum(visit_counts_hourly_low)) %>%
#     mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
# 
#   # combine
#   # bay_hourly_visits_places <- rbind(bay_hourly_visits_places, curr_bay_hourly_visits_places)
#   ac_hourly_visits <- rbind(ac_hourly_visits, curr_ac_hourly_visits)
#   sf_hourly_visits <- rbind(sf_hourly_visits, curr_sf_hourly_visits)
#   smc_hourly_visits <- rbind(smc_hourly_visits, curr_smc_hourly_visits)
#   scc_hourly_visits <- rbind(scc_hourly_visits, curr_scc_hourly_visits)
# }
# 
# saveRDS(ac_hourly_visits, paste0(sg_hourly_path, "ac_hourly_visits_6-22_7-05.rds"))
# saveRDS(sf_hourly_visits, paste0(sg_hourly_path, "sf_hourly_visits_6-22_7-05.rds"))
# saveRDS(smc_hourly_visits, paste0(sg_hourly_path, "smc_hourly_visits_6-22_7-05.rds"))
# saveRDS(scc_hourly_visits, paste0(sg_hourly_path, "scc_hourly_visits_6-22_7-05.rds"))
# 
# # processing to get weighted visits
# # load the places and weights
# bay_hourly_visits_places_601_607 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_6-01_6-07.rds"))
# bay_hourly_visits_places_608_621 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_6-08_6-21.rds"))
# bay_hourly_visits_places_622_705 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_6-22_7-05.rds"))
# # load visits data
# ac_hourly_visits_601_607 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_6-01_6-07.rds"))
# ac_hourly_visits_608_621 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_6-08_6-21.rds"))
# ac_hourly_visits_622_705 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_6-22_7-05.rds"))
# sf_hourly_visits_601_607 <- readRDS(paste0(sg_hourly_path, "sf_hourly_visits_6-01_6-07.rds"))
# sf_hourly_visits_608_621 <- readRDS(paste0(sg_hourly_path, "sf_hourly_visits_6-08_6-21.rds"))
# sf_hourly_visits_622_705 <- readRDS(paste0(sg_hourly_path, "sf_hourly_visits_6-22_7-05.rds"))
# scc_hourly_visits_601_607 <- readRDS(paste0(sg_hourly_path, "scc_hourly_visits_6-01_6-07.rds"))
# scc_hourly_visits_608_621 <- readRDS(paste0(sg_hourly_path, "scc_hourly_visits_6-08_6-21.rds"))
# scc_hourly_visits_622_705 <- readRDS(paste0(sg_hourly_path, "scc_hourly_visits_6-22_7-05.rds"))
# smc_hourly_visits_601_607 <- readRDS(paste0(sg_hourly_path, "smc_hourly_visits_6-01_6-07.rds"))
# smc_hourly_visits_608_621 <- readRDS(paste0(sg_hourly_path, "smc_hourly_visits_6-08_6-21.rds"))
# smc_hourly_visits_622_705 <- readRDS(paste0(sg_hourly_path, "smc_hourly_visits_6-22_7-05.rds"))
# 
# # function to get weighted visits
# getWeightedVisits <- function(hourly_visits, bay_hourly_visits_places) {
#   hourly_visits_weights <- left_join(hourly_visits, bay_hourly_visits_places %>% dplyr::select(safegraph_place_id, hour, date, total_visits_hourly_avg, area_square_feet, hourly_visits_per_area, weekly_median_dwell) %>% rename(place_visits_per_area = hourly_visits_per_area, place_total_visits_avg = total_visits_hourly_avg)) %>%
#   mutate(weighted_visits = total_visits_avg * place_visits_per_area,
#          visit_hours = total_visits_avg * weekly_median_dwell/60, # dwell is in minutes, convert to hours
#          visits_per_area = total_visits_avg / area_square_feet,
#          weighted_visit_hours = visit_hours * place_visits_per_area,
#          weighted_visit_hours_v2 = visit_hours * place_visits_per_area * weekly_median_dwell/60 # multiply the place visits by dwell time to also convert them to visit hours, also converting to hours again
#   )
#   return(hourly_visits_weights)
# }
# 
# # get weighted visits
# ac_hourly_visits_weights_601_607 <- getWeightedVisits(ac_hourly_visits_601_607, bay_hourly_visits_places_601_607)
# ac_hourly_visits_weights_608_621 <- getWeightedVisits(ac_hourly_visits_608_621, bay_hourly_visits_places_608_621)
# ac_hourly_visits_weights_622_705 <- getWeightedVisits(ac_hourly_visits_622_705, bay_hourly_visits_places_622_705)
# sf_hourly_visits_weights_601_607 <- getWeightedVisits(sf_hourly_visits_601_607, bay_hourly_visits_places_601_607)
# sf_hourly_visits_weights_608_621 <- getWeightedVisits(sf_hourly_visits_608_621, bay_hourly_visits_places_608_621)
# sf_hourly_visits_weights_622_705 <- getWeightedVisits(sf_hourly_visits_622_705, bay_hourly_visits_places_622_705)
# scc_hourly_visits_weights_601_607 <- getWeightedVisits(scc_hourly_visits_601_607, bay_hourly_visits_places_601_607)
# scc_hourly_visits_weights_608_621 <- getWeightedVisits(scc_hourly_visits_608_621, bay_hourly_visits_places_608_621)
# scc_hourly_visits_weights_622_705 <- getWeightedVisits(scc_hourly_visits_622_705, bay_hourly_visits_places_622_705)
# smc_hourly_visits_weights_601_607 <- getWeightedVisits(smc_hourly_visits_601_607, bay_hourly_visits_places_601_607)
# smc_hourly_visits_weights_608_621 <- getWeightedVisits(smc_hourly_visits_608_621, bay_hourly_visits_places_608_621)
# smc_hourly_visits_weights_622_705 <- getWeightedVisits(smc_hourly_visits_622_705, bay_hourly_visits_places_622_705)
# 
# # summarize for each day
# summarizeWeightedVisits <- function(hourly_visits_weights) {
#   daily_weighted_visits <- hourly_visits_weights %>%
#     mutate(weighted_visits = ifelse(is.na(area_square_feet), 0, weighted_visits),
#            weighted_visit_hours = ifelse(is.na(area_square_feet), 0, weighted_visit_hours),
#            weighted_visit_hours_v2 = ifelse(is.na(area_square_feet), 0, weighted_visit_hours_v2),
#            visits_per_area = ifelse(is.na(area_square_feet), 0, visits_per_area)) %>% # handle any values that have NAs for the area of the POI, because this introduces NAs in the weighted values and visits per area. this doesn't happen often, so shouldn't affect overall results, but I do this to prevent the NAs from causing problems later
#     group_by(date) %>%
#     summarize(total_weighted_visits = sum(weighted_visits),
#               total_visit_hours = sum(visit_hours),
#               total_visits_per_area = sum(visits_per_area),
#               total_weighted_visit_hours = sum(weighted_visit_hours),
#               total_visits = sum(total_visits_avg),
#               total_weighted_visit_hours_v2 = sum(weighted_visit_hours_v2))
# 
#   return(daily_weighted_visits)
# }
# 
# ac_weighted_visits <- rbind(summarizeWeightedVisits(ac_hourly_visits_weights_601_607), summarizeWeightedVisits(ac_hourly_visits_weights_608_621), summarizeWeightedVisits(ac_hourly_visits_weights_622_705))
# 
# sf_weighted_visits <- rbind(summarizeWeightedVisits(sf_hourly_visits_weights_601_607), summarizeWeightedVisits(sf_hourly_visits_weights_608_621), summarizeWeightedVisits(sf_hourly_visits_weights_622_705))
# 
# scc_weighted_visits <- rbind(summarizeWeightedVisits(scc_hourly_visits_weights_601_607), summarizeWeightedVisits(scc_hourly_visits_weights_608_621), summarizeWeightedVisits(scc_hourly_visits_weights_622_705))
# 
# smc_weighted_visits <- rbind(summarizeWeightedVisits(smc_hourly_visits_weights_601_607), summarizeWeightedVisits(smc_hourly_visits_weights_608_621), summarizeWeightedVisits(smc_hourly_visits_weights_622_705))
# 
# saveRDS(ac_weighted_visits, paste0(sg_path, "weekly-patterns/v2/ac_weighted_visits_06-01-20_07-05-20.rds"))
# saveRDS(sf_weighted_visits, paste0(sg_path, "weekly-patterns/v2/sf_weighted_visits_06-01-20_07-05-20.rds"))
# saveRDS(scc_weighted_visits, paste0(sg_path, "weekly-patterns/v2/scc_weighted_visits_06-01-20_07-05-20.rds"))
# saveRDS(smc_weighted_visits, paste0(sg_path, "weekly-patterns/v2/smc_weighted_visits_06-01-20_07-05-20.rds"))

# load the visits data
ac_weighted_visits <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_weighted_visits_06-01-20_07-05-20.rds"))
sf_weighted_visits <- readRDS(paste0(sg_path, "weekly-patterns/v2/sf_weighted_visits_06-01-20_07-05-20.rds"))
scc_weighted_visits <- readRDS(paste0(sg_path, "weekly-patterns/v2/scc_weighted_visits_06-01-20_07-05-20.rds"))
smc_weighted_visits <- readRDS(paste0(sg_path, "weekly-patterns/v2/smc_weighted_visits_06-01-20_07-05-20.rds"))

# census data
acs_vars = readRDS("Simone_Speizer/censusData2018_acs_acs5.rds")

# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
    regionString <- paste0("state:06+county:", county)
    censusData <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
  
  return(censusData)
}

# weight by population, but if you aren't comparing between counties this may not be necessary
# get population data
ac_fips <- fips("CA", "Alameda") %>% substr(3,5)
sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)
scc_fips <- fips("CA", "Santa Clara") %>% substr(3,5)
smc_fips <- fips("CA", "San Mateo") %>% substr(3,5)

ac_pop <- pullCensus("B01003_001E", ac_fips) %>%
  summarize(total_pop = sum(B01003_001E))
sf_pop <- pullCensus("B01003_001E", sf_fips) %>%
  summarize(total_pop = sum(B01003_001E))
scc_pop <- pullCensus("B01003_001E", scc_fips) %>%
  summarize(total_pop = sum(B01003_001E))
smc_pop <- pullCensus("B01003_001E", smc_fips) %>%
  summarize(total_pop = sum(B01003_001E))

ac_weighted_visits <- ac_weighted_visits %>%
  mutate(total_weighted_visits_per_cap = total_weighted_visits / ac_pop$total_pop,
         total_visit_hours_per_cap = total_visit_hours / ac_pop$total_pop,
         total_weighted_visit_hours_per_cap = total_weighted_visit_hours / ac_pop$total_pop,
         total_visits_per_cap = total_visits / ac_pop$total_pop
  )

sf_weighted_visits <- sf_weighted_visits %>%
  mutate(total_weighted_visits_per_cap = total_weighted_visits / sf_pop$total_pop,
         total_visit_hours_per_cap = total_visit_hours / sf_pop$total_pop,
         total_weighted_visit_hours_per_cap = total_weighted_visit_hours / sf_pop$total_pop,
         total_visits_per_cap = total_visits / sf_pop$total_pop
  )

scc_weighted_visits <- scc_weighted_visits %>%
  mutate(total_weighted_visits_per_cap = total_weighted_visits / scc_pop$total_pop,
         total_visit_hours_per_cap = total_visit_hours / scc_pop$total_pop,
         total_weighted_visit_hours_per_cap = total_weighted_visit_hours / scc_pop$total_pop,
         total_visits_per_cap = total_visits / scc_pop$total_pop
  )

smc_weighted_visits <- smc_weighted_visits %>%
  mutate(total_weighted_visits_per_cap = total_weighted_visits / smc_pop$total_pop,
         total_visit_hours_per_cap = total_visit_hours / smc_pop$total_pop,
         total_weighted_visit_hours_per_cap = total_weighted_visit_hours / smc_pop$total_pop,
         total_visits_per_cap = total_visits / smc_pop$total_pop
  )

Load the Reff data from CalCAT. Note that this is the “ensemble” value from CalCAT, which is a 3 day moving average of the median Reff on each day from various models. I will also use a 3 day moving average of the visits metrics.

ac_reff <- read_csv("Simone_Speizer/Rt_Nowcasts_Alameda_2020-08-03.csv")
names(ac_reff) <- ac_reff[2,]
ac_reff <- ac_reff[-c(1,2),] %>% 
  mutate(date = as.Date(Date))

sf_reff <- read_csv("Simone_Speizer/Rt_Nowcasts_San Francisco_2020-08-03.csv")
names(sf_reff) <- sf_reff[2,]
sf_reff <- sf_reff[-c(1,2),] %>% 
  mutate(date = as.Date(Date))

scc_reff <- read_csv("Simone_Speizer/Rt_Nowcasts_Santa Clara_2020-08-03.csv")
names(scc_reff) <- scc_reff[2,]
scc_reff <- scc_reff[-c(1,2),] %>% 
  mutate(date = as.Date(Date))

smc_reff <- read_csv("Simone_Speizer/Rt_Nowcasts_San Mateo_2020-08-03.csv")
names(smc_reff) <- smc_reff[2,]
smc_reff <- smc_reff[-c(1,2),] %>% 
  mutate(date = as.Date(Date))

Now looking at the correlations between Reff and visits. First I consider unweighted visits.

date_start <- as.Date("2020-06-01")
date_end <- as.Date("2020-06-30")

ac_visits_reff <- left_join(ac_weighted_visits, ac_reff %>% dplyr::select(date, Ensemble)) %>%
  filter(date >= date_start & date <= date_end) %>%
  mutate(county = "Alameda",
         total_visits_per_cap_mov = movavg(total_visits_per_cap, 3, "s"),
         total_visit_hours_per_cap_mov = movavg(total_visit_hours_per_cap, 3, "s"),
         total_weighted_visits_per_cap_mov = movavg(total_weighted_visits_per_cap, 3, "s"),
         total_weighted_visit_hours_per_cap_mov = movavg(total_weighted_visit_hours_per_cap, 3, "s"))

sf_visits_reff <- left_join(sf_weighted_visits, sf_reff %>% dplyr::select(date, Ensemble)) %>%
  filter(date >= date_start & date <= date_end) %>%
  mutate(county = "San Francisco",
         total_visits_per_cap_mov = movavg(total_visits_per_cap, 3, "s"),
         total_visit_hours_per_cap_mov = movavg(total_visit_hours_per_cap, 3, "s"),
         total_weighted_visits_per_cap_mov = movavg(total_weighted_visits_per_cap, 3, "s"),
         total_weighted_visit_hours_per_cap_mov = movavg(total_weighted_visit_hours_per_cap, 3, "s"))

scc_visits_reff <- left_join(scc_weighted_visits, scc_reff %>% dplyr::select(date, Ensemble)) %>%
  filter(date >= date_start & date <= date_end) %>%
  mutate(county = "Santa Clara",
         total_visits_per_cap_mov = movavg(total_visits_per_cap, 3, "s"),
         total_visit_hours_per_cap_mov = movavg(total_visit_hours_per_cap, 3, "s"),
         total_weighted_visits_per_cap_mov = movavg(total_weighted_visits_per_cap, 3, "s"),
         total_weighted_visit_hours_per_cap_mov = movavg(total_weighted_visit_hours_per_cap, 3, "s"))

smc_visits_reff <- left_join(smc_weighted_visits, smc_reff %>% dplyr::select(date, Ensemble)) %>%
  filter(date >= date_start & date <= date_end) %>%
  mutate(county = "San Mateo",
         total_visits_per_cap_mov = movavg(total_visits_per_cap, 3, "s"),
         total_visit_hours_per_cap_mov = movavg(total_visit_hours_per_cap, 3, "s"),
         total_weighted_visits_per_cap_mov = movavg(total_weighted_visits_per_cap, 3, "s"),
         total_weighted_visit_hours_per_cap_mov = movavg(total_weighted_visit_hours_per_cap, 3, "s"))

county_visits_reff <- rbind(ac_visits_reff, sf_visits_reff, scc_visits_reff, smc_visits_reff) %>%
  rename(reff = Ensemble) %>%
  mutate(reff = as.numeric(reff),
         log_reff = log(reff))

ac_visits_reff_cor <- lm(reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Alameda"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.050669 -0.024427 -0.009785  0.024827  0.060854 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.81801    0.09105   8.984 9.69e-10 ***
## total_visits_per_cap_mov  0.50966    0.14532   3.507  0.00155 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03246 on 28 degrees of freedom
## Multiple R-squared:  0.3052, Adjusted R-squared:  0.2804 
## F-statistic:  12.3 on 1 and 28 DF,  p-value: 0.001547
sf_visits_reff_cor <- lm(reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Francisco"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.132734 -0.066315 -0.007083  0.055964  0.153063 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                0.2160     0.2829   0.763   0.4516   
## total_visits_per_cap_mov   1.8773     0.6112   3.071   0.0047 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08422 on 28 degrees of freedom
## Multiple R-squared:  0.252,  Adjusted R-squared:  0.2253 
## F-statistic: 9.434 on 1 and 28 DF,  p-value: 0.004703
scc_visits_reff_cor <- lm(reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Santa Clara"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.065441 -0.021517  0.004518  0.024206  0.056184 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.5865     0.1057   5.546 6.24e-06 ***
## total_visits_per_cap_mov   0.9037     0.1615   5.596 5.46e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03212 on 28 degrees of freedom
## Multiple R-squared:  0.5279, Adjusted R-squared:  0.5111 
## F-statistic: 31.31 on 1 and 28 DF,  p-value: 5.462e-06
smc_visits_reff_cor <- lm(reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Mateo"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13733 -0.05663 -0.01620  0.07124  0.15694 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                0.2211     0.2614   0.846  0.40474   
## total_visits_per_cap_mov   1.2010     0.3684   3.260  0.00292 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08523 on 28 degrees of freedom
## Multiple R-squared:  0.2752, Adjusted R-squared:  0.2493 
## F-statistic: 10.63 on 1 and 28 DF,  p-value: 0.002922
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_visits_per_cap_mov, y = ~reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_visits_per_cap_mov, y = fitted(ac_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_visits_reff_cor)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_visits_per_cap_mov, y = fitted(sf_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_visits_reff_cor)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_visits_per_cap_mov, y = fitted(scc_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_visits_reff_cor)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_visits_per_cap_mov, y = fitted(smc_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_visits_reff_cor)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total visits per capita"), yaxis = list(title = "R_eff"), title = "R_eff vs Total Visits for June")

Try with weighted visits.

ac_wt_visits_reff_cor <- lm(reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_wt_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Alameda"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03778 -0.02874 -0.01045  0.03869  0.06274 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        0.93354    0.07079  13.187 1.56e-13 ***
## total_weighted_visits_per_cap_mov 60.46354   20.99036   2.881  0.00753 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0342 on 28 degrees of freedom
## Multiple R-squared:  0.2286, Adjusted R-squared:  0.201 
## F-statistic: 8.298 on 1 and 28 DF,  p-value: 0.007532
sf_wt_visits_reff_cor <- lm(reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_wt_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Francisco"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.141774 -0.086834  0.008574  0.103362  0.128974 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                         0.7951     0.3199   2.485   0.0192 *
## total_weighted_visits_per_cap_mov  75.1073    83.1518   0.903   0.3741  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09599 on 28 degrees of freedom
## Multiple R-squared:  0.02831,    Adjusted R-squared:  -0.00639 
## F-statistic: 0.8159 on 1 and 28 DF,  p-value: 0.3741
scc_wt_visits_reff_cor <- lm(reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_wt_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Santa Clara"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08449 -0.02512  0.01572  0.02718  0.05396 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        0.98476    0.09293   10.60 2.64e-11 ***
## total_weighted_visits_per_cap_mov 55.93630   26.89436    2.08   0.0468 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04351 on 28 degrees of freedom
## Multiple R-squared:  0.1338, Adjusted R-squared:  0.1029 
## F-statistic: 4.326 on 1 and 28 DF,  p-value: 0.04681
smc_wt_visits_reff_cor <- lm(reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_wt_visits_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Mateo"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.127612 -0.101159  0.003335  0.110143  0.136869 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         0.9389     0.2285   4.109 0.000313 ***
## total_weighted_visits_per_cap_mov  32.2324    55.2858   0.583 0.564550    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09951 on 28 degrees of freedom
## Multiple R-squared:  0.01199,    Adjusted R-squared:  -0.02329 
## F-statistic: 0.3399 on 1 and 28 DF,  p-value: 0.5646
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_weighted_visits_per_cap_mov, y = ~reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_weighted_visits_per_cap_mov, y = fitted(ac_wt_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_wt_visits_reff_cor)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_weighted_visits_per_cap_mov, y = fitted(sf_wt_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_wt_visits_reff_cor)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_weighted_visits_per_cap_mov, y = fitted(scc_wt_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_wt_visits_reff_cor)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_weighted_visits_per_cap_mov, y = fitted(smc_wt_visits_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_wt_visits_reff_cor)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total weighted visits per capita"), yaxis = list("R_eff"), title = "R_eff vs Total Weighted Visits for June")

Try with visit hours.

ac_visit_hrs_reff_cor <- lm(reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Alameda"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.04172 -0.03384 -0.01428  0.04431  0.05816 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    1.09065    0.08054  13.541 8.19e-14 ***
## total_visit_hours_per_cap_mov  0.09897    0.17256   0.574    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03872 on 28 degrees of freedom
## Multiple R-squared:  0.01161,    Adjusted R-squared:  -0.02369 
## F-statistic: 0.3289 on 1 and 28 DF,  p-value: 0.5709
sf_visit_hrs_reff_cor <- lm(reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Francisco"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13307 -0.08694  0.01853  0.08028  0.14021 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.8979     0.1977   4.541 9.71e-05 ***
## total_visit_hours_per_cap_mov   0.4466     0.4735   0.943    0.354    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09586 on 28 degrees of freedom
## Multiple R-squared:  0.03079,    Adjusted R-squared:  -0.003824 
## F-statistic: 0.8895 on 1 and 28 DF,  p-value: 0.3537
scc_visit_hrs_reff_cor <- lm(reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Santa Clara"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097518 -0.007497  0.010839  0.026745  0.049196 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.99889    0.09336  10.699 2.12e-11 ***
## total_visit_hours_per_cap_mov  0.39787    0.20739   1.918   0.0653 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04395 on 28 degrees of freedom
## Multiple R-squared:  0.1162, Adjusted R-squared:  0.08461 
## F-statistic: 3.681 on 1 and 28 DF,  p-value: 0.0653
smc_visit_hrs_reff_cor <- lm(reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Mateo"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.129162 -0.100667  0.005357  0.109561  0.138287 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.9802     0.2177   4.502 0.000108 ***
## total_visit_hours_per_cap_mov   0.1757     0.4169   0.422 0.676574    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09979 on 28 degrees of freedom
## Multiple R-squared:  0.006307,   Adjusted R-squared:  -0.02918 
## F-statistic: 0.1777 on 1 and 28 DF,  p-value: 0.6766
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_visit_hours_per_cap_mov, y = ~reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_visit_hours_per_cap_mov, y = fitted(ac_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_visit_hours_per_cap_mov, y = fitted(sf_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_visit_hours_per_cap_mov, y = fitted(scc_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_visit_hours_per_cap_mov, y = fitted(smc_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total visit-hours per capita"), yaxis = list("R_eff"), title = "R_eff vs Total Visit-Hours for June")

Try with weighted visit hours.

ac_wt_visit_hrs_reff_cor <- lm(reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_wt_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Alameda"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044085 -0.021200 -0.009846  0.016006  0.075686 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             0.95661    0.05684  16.829 3.56e-16 ***
## total_weighted_visit_hours_per_cap_mov 67.52033   21.19363   3.186  0.00353 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03336 on 28 degrees of freedom
## Multiple R-squared:  0.2661, Adjusted R-squared:  0.2398 
## F-statistic: 10.15 on 1 and 28 DF,  p-value: 0.003529
sf_wt_visit_hrs_reff_cor <- lm(reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_wt_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Francisco"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13518 -0.08419  0.02172  0.07756  0.15117 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                              0.8522     0.2185   3.900 0.000548 ***
## total_weighted_visit_hours_per_cap_mov  63.3128    59.5882   1.063 0.297085    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09547 on 28 degrees of freedom
## Multiple R-squared:  0.03876,    Adjusted R-squared:  0.004426 
## F-statistic: 1.129 on 1 and 28 DF,  p-value: 0.2971
scc_wt_visit_hrs_reff_cor <- lm(reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_wt_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Santa Clara"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.089474 -0.019147  0.004341  0.028380  0.043817 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              0.77015    0.08558   8.999 9.37e-10
## total_weighted_visit_hours_per_cap_mov 179.97901   37.72496   4.771 5.19e-05
##                                           
## (Intercept)                            ***
## total_weighted_visit_hours_per_cap_mov ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03472 on 28 degrees of freedom
## Multiple R-squared:  0.4484, Adjusted R-squared:  0.4287 
## F-statistic: 22.76 on 1 and 28 DF,  p-value: 5.189e-05
smc_wt_visit_hrs_reff_cor <- lm(reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_wt_visit_hrs_reff_cor)
## 
## Call:
## lm(formula = reff ~ total_weighted_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Mateo"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.124339 -0.099356  0.000205  0.112781  0.135448 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                              0.9458     0.2460   3.845 0.000636 ***
## total_weighted_visit_hours_per_cap_mov  43.1766    84.1922   0.513 0.612090    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09964 on 28 degrees of freedom
## Multiple R-squared:  0.009305,   Adjusted R-squared:  -0.02608 
## F-statistic: 0.263 on 1 and 28 DF,  p-value: 0.6121
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_weighted_visit_hours_per_cap_mov, y = ~reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(ac_wt_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_wt_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(sf_wt_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_wt_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(scc_wt_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_wt_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(smc_wt_visit_hrs_reff_cor), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_wt_visit_hrs_reff_cor)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total weighted visit-hours per capita"), yaxis = list("R_eff"), title = "R_eff vs Total Weighted Visit-Hours for June")

Interesting. So it seems that total visits (unweighted) generally do better, though weighted visit-hours also do well for a couple counties in particular (Santa Clara and Alameda). Santa Clara and Alameda also tend to have the best fits overall.

Now try with the log of Reff.

Again first unweighted visits.

ac_visits_reff_cor_log <- lm(log_reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Alameda"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044562 -0.020959 -0.008391  0.021414  0.053010 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              -0.15269    0.07929  -1.926  0.06436 . 
## total_visits_per_cap_mov  0.44824    0.12656   3.542  0.00141 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02827 on 28 degrees of freedom
## Multiple R-squared:  0.3094, Adjusted R-squared:  0.2847 
## F-statistic: 12.54 on 1 and 28 DF,  p-value: 0.001414
sf_visits_reff_cor_log <- lm(log_reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Francisco"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.125883 -0.063452 -0.004567  0.050793  0.140308 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               -0.7390     0.2625  -2.815  0.00882 **
## total_visits_per_cap_mov   1.7646     0.5671   3.112  0.00425 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07814 on 28 degrees of freedom
## Multiple R-squared:  0.2569, Adjusted R-squared:  0.2304 
## F-statistic: 9.682 on 1 and 28 DF,  p-value: 0.004254
scc_visits_reff_cor_log <- lm(log_reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Santa Clara"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.056222 -0.018590  0.003986  0.021662  0.048595 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.35169    0.09208  -3.820  0.00068 ***
## total_visits_per_cap_mov  0.78652    0.14063   5.593  5.5e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02797 on 28 degrees of freedom
## Multiple R-squared:  0.5277, Adjusted R-squared:  0.5108 
## F-statistic: 31.28 on 1 and 28 DF,  p-value: 5.504e-06
smc_visits_reff_cor_log <- lm(log_reff~total_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Mateo"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13118 -0.05053 -0.01237  0.06345  0.14409 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               -0.7353     0.2430  -3.026  0.00527 **
## total_visits_per_cap_mov   1.1302     0.3425   3.299  0.00264 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07925 on 28 degrees of freedom
## Multiple R-squared:   0.28,  Adjusted R-squared:  0.2542 
## F-statistic: 10.89 on 1 and 28 DF,  p-value: 0.002644
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_visits_per_cap_mov, y = ~log_reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_visits_per_cap_mov, y = fitted(ac_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_visits_per_cap_mov, y = fitted(sf_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_visits_per_cap_mov, y = fitted(scc_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_visits_per_cap_mov, y = fitted(smc_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total visits per capita"), yaxis = list(title = "log(R_eff)"), title = "Log of R_eff vs Total Visits for June")

Try with weighted visits.

ac_wt_visits_reff_cor_log <- lm(log_reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_wt_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Alameda"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033228 -0.025274 -0.009042  0.033504  0.054679 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                       -0.05126    0.06170  -0.831  0.41311   
## total_weighted_visits_per_cap_mov 53.22593   18.29281   2.910  0.00702 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02981 on 28 degrees of freedom
## Multiple R-squared:  0.2322, Adjusted R-squared:  0.2047 
## F-statistic: 8.466 on 1 and 28 DF,  p-value: 0.007015
sf_wt_visits_reff_cor_log <- lm(log_reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_wt_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Francisco"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13518 -0.08075  0.01180  0.09484  0.11822 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        -0.2074     0.2973  -0.698    0.491
## total_weighted_visits_per_cap_mov  73.9137    77.2725   0.957    0.347
## 
## Residual standard error: 0.0892 on 28 degrees of freedom
## Multiple R-squared:  0.03164,    Adjusted R-squared:  -0.002941 
## F-statistic: 0.915 on 1 and 28 DF,  p-value: 0.347
scc_wt_visits_reff_cor_log <- lm(log_reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_wt_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Santa Clara"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07312 -0.02130  0.01412  0.02329  0.04691 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       -0.007374   0.080728  -0.091   0.9279  
## total_weighted_visits_per_cap_mov 49.341182  23.363360   2.112   0.0437 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03779 on 28 degrees of freedom
## Multiple R-squared:  0.1374, Adjusted R-squared:  0.1066 
## F-statistic:  4.46 on 1 and 28 DF,  p-value: 0.04375
smc_wt_visits_reff_cor_log <- lm(log_reff~total_weighted_visits_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_wt_visits_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visits_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Mateo"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.122230 -0.094979  0.007389  0.102442  0.125495 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       -0.06425    0.21305  -0.302    0.765
## total_weighted_visits_per_cap_mov 31.40755   51.55110   0.609    0.547
## 
## Residual standard error: 0.09279 on 28 degrees of freedom
## Multiple R-squared:  0.01308,    Adjusted R-squared:  -0.02216 
## F-statistic: 0.3712 on 1 and 28 DF,  p-value: 0.5473
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_weighted_visits_per_cap_mov, y = ~log_reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_weighted_visits_per_cap_mov, y = fitted(ac_wt_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_wt_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_weighted_visits_per_cap_mov, y = fitted(sf_wt_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_wt_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_weighted_visits_per_cap_mov, y = fitted(scc_wt_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_wt_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_weighted_visits_per_cap_mov, y = fitted(smc_wt_visits_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_wt_visits_reff_cor_log)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total weighted visits per capita"), yaxis = list("log(R_eff)"), title = "Log of R_eff vs Total Weighted Visits for June")

Try with visit hours.

ac_visit_hrs_reff_cor_log <- lm(log_reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Alameda"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03676 -0.02969 -0.01209  0.03845  0.05071 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                    0.08641    0.07034   1.228    0.229
## total_visit_hours_per_cap_mov  0.08850    0.15069   0.587    0.562
## 
## Residual standard error: 0.03381 on 28 degrees of freedom
## Multiple R-squared:  0.01217,    Adjusted R-squared:  -0.02311 
## F-statistic: 0.3449 on 1 and 28 DF,  p-value: 0.5617
sf_visit_hrs_reff_cor_log <- lm(log_reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Francisco"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12605 -0.08181  0.02064  0.07333  0.12786 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                   -0.09517    0.18410  -0.517    0.609
## total_visit_hours_per_cap_mov  0.41283    0.44088   0.936    0.357
## 
## Residual standard error: 0.08926 on 28 degrees of freedom
## Multiple R-squared:  0.03036,    Adjusted R-squared:  -0.004267 
## F-statistic: 0.8768 on 1 and 28 DF,  p-value: 0.3571
scc_visit_hrs_reff_cor_log <- lm(log_reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "Santa Clara"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.086380 -0.005774  0.009330  0.023264  0.042364 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                   0.008812   0.081383   0.108   0.9145  
## total_visit_hours_per_cap_mov 0.342654   0.180786   1.895   0.0684 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03831 on 28 degrees of freedom
## Multiple R-squared:  0.1137, Adjusted R-squared:  0.08206 
## F-statistic: 3.592 on 1 and 28 DF,  p-value: 0.06841
smc_visit_hrs_reff_cor_log <- lm(log_reff~total_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_visit_hours_per_cap_mov, data = county_visits_reff %>% 
##     filter(county == "San Mateo"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.123739 -0.094500  0.009358  0.100898  0.126876 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                   -0.02395    0.20306  -0.118    0.907
## total_visit_hours_per_cap_mov  0.17121    0.38882   0.440    0.663
## 
## Residual standard error: 0.09308 on 28 degrees of freedom
## Multiple R-squared:  0.006877,   Adjusted R-squared:  -0.02859 
## F-statistic: 0.1939 on 1 and 28 DF,  p-value: 0.6631
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_visit_hours_per_cap_mov, y = ~log_reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_visit_hours_per_cap_mov, y = fitted(ac_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_visit_hours_per_cap_mov, y = fitted(sf_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_visit_hours_per_cap_mov, y = fitted(scc_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_visit_hours_per_cap_mov, y = fitted(smc_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total visit-hours per capita"), yaxis = list("log(R_eff)"), title = "Log of R_eff vs Total Visit-Hours for June")

Try with weighted visit hours.

ac_wt_visit_hrs_reff_cor_log <- lm(log_reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Alameda"))
summary(ac_wt_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visit_hours_per_cap_mov, 
##     data = county_visits_reff %>% filter(county == "Alameda"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.038758 -0.018324 -0.008191  0.013889  0.066013 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                            -0.03047    0.04957  -0.615  0.54375   
## total_weighted_visit_hours_per_cap_mov 59.25949   18.48087   3.207  0.00335 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02909 on 28 degrees of freedom
## Multiple R-squared:  0.2686, Adjusted R-squared:  0.2425 
## F-statistic: 10.28 on 1 and 28 DF,  p-value: 0.003349
sf_wt_visit_hrs_reff_cor_log <- lm(log_reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Francisco"))
summary(sf_wt_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visit_hours_per_cap_mov, 
##     data = county_visits_reff %>% filter(county == "San Francisco"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12792 -0.07748  0.02355  0.07207  0.13774 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             -0.1357     0.2035  -0.667    0.510
## total_weighted_visit_hours_per_cap_mov  58.0643    55.5030   1.046    0.304
## 
## Residual standard error: 0.08892 on 28 degrees of freedom
## Multiple R-squared:  0.03762,    Adjusted R-squared:  0.003245 
## F-statistic: 1.094 on 1 and 28 DF,  p-value: 0.3044
scc_wt_visit_hrs_reff_cor_log <- lm(log_reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "Santa Clara"))
summary(scc_wt_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visit_hours_per_cap_mov, 
##     data = county_visits_reff %>% filter(county == "Santa Clara"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.077234 -0.016738  0.003701  0.024711  0.037867 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             -0.1931     0.0743  -2.599   0.0147 *  
## total_weighted_visit_hours_per_cap_mov 157.1873    32.7523   4.799  4.8e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03014 on 28 degrees of freedom
## Multiple R-squared:  0.4513, Adjusted R-squared:  0.4317 
## F-statistic: 23.03 on 1 and 28 DF,  p-value: 4.801e-05
smc_wt_visit_hrs_reff_cor_log <- lm(log_reff~total_weighted_visit_hours_per_cap_mov, county_visits_reff %>% 
  filter(county == "San Mateo"))
summary(smc_wt_visit_hrs_reff_cor_log)
## 
## Call:
## lm(formula = log_reff ~ total_weighted_visit_hours_per_cap_mov, 
##     data = county_visits_reff %>% filter(county == "San Mateo"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.119191 -0.093334  0.004444  0.105403  0.124144 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             -0.0645     0.2293  -0.281    0.781
## total_weighted_visit_hours_per_cap_mov  44.4889    78.4670   0.567    0.575
## 
## Residual standard error: 0.09287 on 28 degrees of freedom
## Multiple R-squared:  0.01135,    Adjusted R-squared:  -0.02396 
## F-statistic: 0.3215 on 1 and 28 DF,  p-value: 0.5752
plot_ly() %>%
  add_trace(data = county_visits_reff, x = ~total_weighted_visit_hours_per_cap_mov, y = ~log_reff, color = ~county, text = ~date, type = 'scatter', mode = 'markers') %>%
  add_trace(data = county_visits_reff %>% filter(county == "Alameda"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(ac_wt_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(ac_wt_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "solid"), name = "Alameda fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Francisco"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(sf_wt_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(sf_wt_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "dash"), name = "San Francisco fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "Santa Clara"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(scc_wt_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(scc_wt_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "dot"), name = "Santa Clara fit") %>%
  add_trace(data = county_visits_reff %>% filter(county == "San Mateo"), x = ~total_weighted_visit_hours_per_cap_mov, y = fitted(smc_wt_visit_hrs_reff_cor_log), type = "scatter", mode = 'lines', text = paste0("R squared ", summary(smc_wt_visit_hrs_reff_cor_log)$r.squared), line = list(color = "black", dash = "longdash"), name = "San Mateo fit") %>%
  layout(xaxis = list(title = "Total weighted visit-hours per capita"), yaxis = list("log(R_eff)"), title = "Log of R_eff vs Total Weighted Visit-Hours for June")

The log didn’t seem to affect results much.